	
********************************************************************************
** 	TITLE: b8_2_sag_gen_roa_analysis.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE 1:  Append Genesee and Saginaw with Within County Recidivism Measure
**  PURPOSE 2:  Produce Genesee and Saginaw County Analysis samples 
**	with across-county measure of recidivism
** 
**	Note: Uses exports from B5 and B8_1.  
********************************************************************************

	set sortseed 13
	
********************************************************************************
	
/********************************************************************************
1) Produce Within County Measures (Appending)
*********************************************************************************/
***** Genesee County with within-county recidivism
use "$output_data/gen_appended.dta", clear
		
		sort inmate booking_date, stable

	* identify unique indiviuals
		gen last = lower(trim(substr(fullname,1,strpos(fullname,",")-1)))
		gen first = trim(substr(subinstr(fullname,last,"",.),strpos(subinstr(fullname,last,"",.),", ")+2,.))
		replace first  = substr(first,1,strpos(first," ")-1) if strpos(first," ")
		replace first  = lower(trim(first))
		
keep stay_rec inmate first last dob booking_date release_date $design_control $auxiliary_control ///
			return_*m genesee ignite_0_rec 
		
***** Bring in Saginaw County data with within-county recidivism
		append using "$output_data/sag_appended.dta"	

foreach v in first last{

replace `v' = lower(trim(`v'))

}
			
//Replacing age group so that both Sag and Genesee are the same in terms of non-missing controls
		replace age_17_24 = age_18_24 if missing(age_17_24)

replace month = month(booking_date)
capture drop year
gen year = year(booking_date)
	capture drop ym_booked			
	gen ym_booked = mofd(booking_date)
	cap drop D_ignite_proxy 
	gen D_ignite_proxy = ym_booked>=  mofd(date("9/08/2020","MDY"))  //728		
keep if ym_booked<mofd(date("06/01/2022","MDY"))
keep if booking_date>=date("01/01/2016","MDY")		
	replace ignite_0_rec=0 if !genesee		
	gen quarter_booked = quarter(booking_date)
				
***** Genesee and Saginaw County with within-county recidivism
save "$output_data/sag_gen_appended.dta", replace


/********************************************************************************
2) Produce Cross County Measures
*********************************************************************************/


use "$output_data/gen_appended.dta", clear

*********
		
		sort inmate booking_date, stable
		
		
	* identify unique indiviuals
		gen last = lower(trim(substr(fullname,1,strpos(fullname,",")-1)))
		gen first = trim(substr(subinstr(fullname,last,"",.),strpos(subinstr(fullname,last,"",.),", ")+2,.))
		replace first  = substr(first,1,strpos(first," ")-1) if strpos(first," ")
		replace first  = lower(trim(first))

	append using "$output_data/sag_appended.dta"


		//Replacing age group so that both Sag and Genesee are the same in terms of non-missing controls
		replace age_17_24 = age_18_24 if missing(age_17_24)

		capture drop ym_booked
		gen ym_booked = mofd(booking_date)		
		cap drop D_ignite_proxy 
		gen D_ignite_proxy = ym_booked>=  mofd(date("9/08/2020","MDY"))  //728
				
		replace D_remove_any = any_resch if mi(D_remove_any)
		
		cap drop interact
		gen interact = genesee*D_remove_any

	//replacing interact with interact_proxy_any for Table V (C5) columns 3 and 4. 
		cap drop interact_proxy_any
		gen interact_proxy_any = D_ignite_proxy*D_remove_any

		replace month_booked = month(booking_date)

//Generating Combined Genesee/Saginaw Individual ID
egen sg_id = group(last first dob)

//Saginaw County Dummy, akin to Genesee County dummy
gen saginaw = !genesee

//case_id1-case_id15; case_nr1-case_nr18
forval t=1/18{
cap gen case_id`t' = ""
replace case_id`t' = case_nr`t' if missing(case_id`t')
}

bys sg_id: egen sum_genesee = total(genesee)
bys sg_id: egen sum_saginaw = total(saginaw)
count if sum_genesee&sum_saginaw


//*********Handling individuals with duplicate sg_id booking_date
cap drop tables
duplicates report sg_id booking_date
duplicates tag sg_id booking_date, gen(tables)
codebook sg_id
list sg_id inmate first last dob booking_date release_date genesee stay_rec case_id1 tables release_reason1 if tables > 0

//those missing sg_id are virtually all in saginaw and data pulled incorrectly. Genesee County obs lack case ids, dob and have limited stay less than 1 day
//usually missing DOB, from refresh 12/22/2022 sample, which is updated with demographics
	//they also miss case_id info. 
drop if missing(sg_id)
//(77 observations deleted)

//work on a duplicates dropping
//duplicates involve bookings in either jail, suspect gets transferred to the neighboring jail the same day.
	//given that booking and release are the same for one jail while the second jail has longer stay. 
//prioritize sg_id booking_date: stay_rec case_id1 genesee  release_reason1
gen no_roas_file= missing(case_id1)
gen staying_0time = -stay_rec
codebook saginaw
gen noreleasereason = missing(release_reason1)

sort sg_id booking_date staying_0time no_roas_file saginaw noreleasereason

duplicates drop sg_id booking_date, force
//(74 observations deleted) 
drop tables no_roas_file staying_0time noreleasereason



//********************Time to recidivism (in months)
bys sg_id (booking_date release_date): gen return_time_sg =  (booking_date[_n+1] - release_date)/30
		
forval j=1/12{
	cap drop return_`j'm
	gen return_`j'm = 0
	replace return_`j'm = 1 if return_time_sg<=`j'
	replace return_`j'm = -9 if prison==1
	replace return_`j'm = -10 if (date("05/10/2023","MDY")-release_date)<`j'*30
	capture drop attrit_`j'm 
	gen attrit_`j'm = ((return_`j'm == -10)&!prison)
}		

* booked in past year
	* booked in past year 
		bys sg_id (booking_date release_date): gen N_record_sg = _n
		bys sg_id (booking_date release_date): gen prior_offense_time_sg = ///
			(booking_date - booking_date[_n-1])/30
		forvalues j=1/3{
			cap drop prior_offense_`j'y
			bys sg_id (booking_date release_date): gen prior_offense_`j'y = prior_offense_time_sg<=12*`j' if N_record_sg>1
			replace prior_offense_`j'y=0 if mi(prior_offense_`j'y)
		}

	foreach v in $auxiliary_control{
		 
		 gen int_test_`v' = `v'*genesee
		 
		 }
		 
	foreach v in $design_control{
		 
		 gen des_int_`v' = `v'*genesee
		 
		 }
		
	//Number of Recidivisms with 3, 6, 9, and 12 months		
	foreach i in 3 6 9 12{
		cap drop num_return_`i'm
		gen num_return_`i'm=return_`i'm==1
		forvalues j =2/30{
			cap qui bys sg_id (booking_date release_date): gen time_gap`j'=booking_date[_n+`j'] - release_date if !mi(release_date)
			replace num_return_`i'm=`j' if !mi(time_gap`j')&time_gap`j'<=30*`i'
		}
		replace num_return_`i'm=0 if mi(num_return_`i'm)
		replace num_return_`i'm=. if (date("5/10/2023","MDY") - release_date < 30*`i')
		}
		
*********************************fIXING COST OF CRIME		
	
	//Cost of Crime of first recidivism within 3, 6, 9, and 12 months	
	foreach i in 3 6 9 12{
		cap drop crime_costs_`i'm
		bys sg_id (booking_date release_date): ///
			gen crime_costs_`i'm = 25351*num_person[_n+1]+ ///
				3091*num_property[_n+1]+ 1819*num_po[_n+1]+ 83743*num_dui[_n+1]+ ///
				10147*num_drug[_n+1]+ 3725*num_weapons[_n+1]+ ///
				10590*num_traffic_other[_n+1]+ 501*num_crime_other[_n+1] ///
				if num_return_`i'm==1
			replace crime_costs_`i'm =0 if mi(crime_costs_`i'm )
			replace crime_costs_`i'm=. if (date("5/10/2023","MDY") - release_date < 30*`i')

	//Number of Crime Types of first recidivism within 3, 6, 9, and 12 months	
		foreach crime in person property po dui drug weapons traffic_other crime_other{
			cap drop num_`crime'_`i'm
			bys sg_id (booking_date release_date): ///
			gen num_`crime'_`i'm = num_`crime'[_n+1] ///
				if num_return_`i'm==1
			replace num_`crime'_`i'm =0 if mi(num_`crime'_`i'm)
			replace num_`crime'_`i'm=. if (date("5/10/2023","MDY") - release_date < 30*`i')

		}
		}

	//Cost of Crime for all recidivism within 3, 6, 9, and 12 months	
	foreach i in 3 6 9 12{
		forvalues j=2/30{
			bys sg_id (booking_date release_date): ///
				gen temp_crime_costs_`i'm = 25351*num_person[_n+`j']+ ///
				3091*num_property[_n+`j']+ 1819*num_po[_n+`j']+ 83743*num_dui[_n+`j']+ ///
				10147*num_drug[_n+`j']+ 3725*num_weapons[_n+`j']+ ///
				10590*num_traffic_other[_n+`j']+ 501*num_crime_other[_n+`j'] ///
				if num_return_`i'm>=`j'
				replace crime_costs_`i'm = crime_costs_`i'm + temp_crime_costs_`i'm ///
				if !missing(temp_crime_costs_`i'm)
				drop temp_crime_costs_`i'm
		
		foreach crime in person property po dui drug weapons traffic_other crime_other{
			bys sg_id (booking_date release_date): ///
				gen temp_num_`crime'_`i'm = num_`crime'[_n+`j'] ///
				if num_return_`i'm>=`j'
						
				replace num_`crime'_`i'm = num_`crime'_`i'm + temp_num_`crime'_`i'm ///
				if !missing(temp_num_`crime'_`i'm)
				drop temp_num_`crime'_`i'm
			}
			}
		replace crime_costs_`i'm=0 if mi(crime_costs_`i'm)
		replace crime_costs_`i'm=. if (date("5/10/2023","MDY") - release_date < 30*`i')
		replace num_return_`i'm=0 if mi(num_return_`i'm)
		replace num_return_`i'm=. if (date("5/10/2023","MDY") - release_date < 30*`i')
		}
		
		foreach v in 3 6 9 12{
			replace crime_costs_`v'm=-999 if return_`v'm<0
			replace  num_return_`v'm=-9 if return_`v'm<0
		foreach crime in person property po dui drug weapons traffic_other crime_other{	
			replace num_`crime'_`v'm=-9 if return_`v'm<0
			}		
			}
			

	//Charged within three months		
		cap drop charge_time_gap charged_3m
		bys sg_id (booking_date release_date): gen charge_time_gap =  booking_date[_n+1] - release_date if !mi(case_id1)
		gen charged_3m = charge_time_gap<=30*3 if !mi(charge_time_gap)
		replace charged_3m=0 if mi(charged_3m)
		replace charged_3m=. if (date("5/10/2023","MDY") - release_date < 30*3)

	//Number of charges within three months		
	cap drop num_charged_3m
		bys sg_id (booking_date release_date): gen num_charged_3m = num_charges[_n+1] if charge_time_gap<=30*3&!mi(charge_time_gap)
		replace num_charged_3m=0 if mi(num_charged_3m)
		replace num_charged_3m=1 if num_charged_3m==0 &charged_3m ==1
		replace num_charged_3m=. if (date("5/10/2023","MDY") - release_date < 30*3)

	//Felony charge within three months		
	cap drop charged_fel_3m		
		bys sg_id (booking_date release_date): gen charged_fel_3m =  charged_3m if D_felony[_n+1]==1
		replace charged_fel_3m=0 if mi(charged_fel_3m)
		replace charged_fel_3m=. if (date("5/10/2023","MDY") - release_date < 30*3)
	
	//Censoring additional recidivism measures
		foreach v in charged_fel_3m charged_3m num_charged_3m {
		
			replace `v' = -9 if return_3m<0	
		}	
		
	//Unique inmates
unique sg_id if  sum_genesee&sum_saginaw	
	
	//Quarter booking date
	gen quarter_booked = quarter(booking_date)

***** Saginaw-Genesee cross-county measure of recidivism
	save "$output_data/sag_gen_updated.dta", replace

/*
Genesee County Cross-County Recidivism Prediction Sample
*/
merge 1:1 inmate booking_date using "$output_data/prediction_temp.dta", keepusing(inmate booking_date) keep(3)

sort sg_id booking_date, stable

missings dropvars, force

		preserve
	* Predict recidivism using 2015 sample
		local v return_3m

		//Prediction using design and auxiliary controls for predictions. 
		cap drop return_3m_hat 
		logit `v' $design_control $auxiliary_control ///
			if `v'>=0& booking_year<=2015, cluster(inmate)
		predict `v'_hat, pr

		su `v'_hat, de

		local v num_maj_weekly

		cap drop num_maj_weekly_hat 
		logit `v' $design_control $auxiliary_control ///
			if `v'>=0& booking_year<=2015, cluster(inmate)
		predict `v'_hat, pr
		cap drop e_sample 
		gen e_sample = (booking_year>2015)

	//Saving Prediction Sample for Genesee County				
save "$output_data/prediction.dta", replace

/*
Genesee County Cross-County Estimate Sample
*/
restore
	* drop 2015 in main sample
		drop if  booking_year<=2015
		
		sort inmate booking_date, stable
		save "$output_data/estimate.dta", replace

/*
Saginaw County Cross-County Estimate Sample
*/		
	use "$output_data/sag_gen_updated.dta", clear
		
merge 1:1 inmate booking_date using "$output_data/sag_post_roa_all_temp.dta", keepusing(inmate booking_date) keep(3)
				
	save "$output_data/sag_post_roa_all.dta", replace

/*
Saginaw County Cross-County Post-IGNITE Estimate Sample
*/		
	keep if mofd(booking_date)>=mofd(date("09/01/2020","MDY")) & ///
		mofd(booking_date)<mofd(date("06/01/2022","MDY"))
	drop ignite_0_rec
	gen ignite_0_rec = 0
	
	save "$output_data/sag_post_roa.dta", replace				